use of org.apache.hive.hcatalog.data.schema.HCatSchema in project hive by apache.
the class HCatBaseInputFormat method getTableSchema.
/**
* Gets the HCatTable schema for the table specified in the HCatInputFormat.setInput call
* on the specified job context. This information is available only after HCatInputFormat.setInput
* has been called for a JobContext.
* @param conf the Configuration object
* @return the table schema
* @throws IOException if HCatInputFormat.setInput has not been called
* for the current context
*/
public static HCatSchema getTableSchema(Configuration conf) throws IOException {
InputJobInfo inputJobInfo = getJobInfo(conf);
HCatSchema allCols = new HCatSchema(new LinkedList<HCatFieldSchema>());
for (HCatFieldSchema field : inputJobInfo.getTableInfo().getDataColumns().getFields()) {
allCols.append(field);
}
for (HCatFieldSchema field : inputJobInfo.getTableInfo().getPartitionColumns().getFields()) {
allCols.append(field);
}
return allCols;
}
use of org.apache.hive.hcatalog.data.schema.HCatSchema in project hive by apache.
the class HCatOutputFormat method setOutput.
/**
* Set the information about the output to write for the job. This queries the metadata server
* to find the StorageHandler to use for the table. It throws an error if the
* partition is already published.
* @param conf the Configuration object
* @param credentials the Credentials object
* @param outputJobInfo the table output information for the job
* @throws IOException the exception in communicating with the metadata server
*/
@SuppressWarnings("unchecked")
public static void setOutput(Configuration conf, Credentials credentials, OutputJobInfo outputJobInfo) throws IOException {
IMetaStoreClient client = null;
try {
HiveConf hiveConf = HCatUtil.getHiveConf(conf);
client = HCatUtil.getHiveMetastoreClient(hiveConf);
Table table = HCatUtil.getTable(client, outputJobInfo.getDatabaseName(), outputJobInfo.getTableName());
List<String> indexList = client.listIndexNames(outputJobInfo.getDatabaseName(), outputJobInfo.getTableName(), Short.MAX_VALUE);
for (String indexName : indexList) {
Index index = client.getIndex(outputJobInfo.getDatabaseName(), outputJobInfo.getTableName(), indexName);
if (!index.isDeferredRebuild()) {
throw new HCatException(ErrorType.ERROR_NOT_SUPPORTED, "Store into a table with an automatic index from Pig/Mapreduce is not supported");
}
}
StorageDescriptor sd = table.getTTable().getSd();
if (sd.isCompressed()) {
throw new HCatException(ErrorType.ERROR_NOT_SUPPORTED, "Store into a compressed partition from Pig/Mapreduce is not supported");
}
if (sd.getBucketCols() != null && !sd.getBucketCols().isEmpty()) {
throw new HCatException(ErrorType.ERROR_NOT_SUPPORTED, "Store into a partition with bucket definition from Pig/Mapreduce is not supported");
}
if (sd.getSortCols() != null && !sd.getSortCols().isEmpty()) {
throw new HCatException(ErrorType.ERROR_NOT_SUPPORTED, "Store into a partition with sorted column definition from Pig/Mapreduce is not supported");
}
// Set up a common id hash for this job, so that when we create any temporary directory
// later on, it is guaranteed to be unique.
String idHash;
if ((idHash = conf.get(HCatConstants.HCAT_OUTPUT_ID_HASH)) == null) {
idHash = String.valueOf(Math.random());
}
conf.set(HCatConstants.HCAT_OUTPUT_ID_HASH, idHash);
if (table.getTTable().getPartitionKeysSize() == 0) {
if ((outputJobInfo.getPartitionValues() != null) && (!outputJobInfo.getPartitionValues().isEmpty())) {
// attempt made to save partition values in non-partitioned table - throw error.
throw new HCatException(ErrorType.ERROR_INVALID_PARTITION_VALUES, "Partition values specified for non-partitioned table");
}
// non-partitioned table
outputJobInfo.setPartitionValues(new HashMap<String, String>());
} else {
// partitioned table, we expect partition values
// convert user specified map to have lower case key names
Map<String, String> valueMap = new HashMap<String, String>();
if (outputJobInfo.getPartitionValues() != null) {
for (Map.Entry<String, String> entry : outputJobInfo.getPartitionValues().entrySet()) {
valueMap.put(entry.getKey().toLowerCase(), entry.getValue());
}
}
if ((outputJobInfo.getPartitionValues() == null) || (outputJobInfo.getPartitionValues().size() < table.getTTable().getPartitionKeysSize())) {
// dynamic partition usecase - partition values were null, or not all were specified
// need to figure out which keys are not specified.
List<String> dynamicPartitioningKeys = new ArrayList<String>();
boolean firstItem = true;
for (FieldSchema fs : table.getPartitionKeys()) {
if (!valueMap.containsKey(fs.getName().toLowerCase())) {
dynamicPartitioningKeys.add(fs.getName().toLowerCase());
}
}
if (valueMap.size() + dynamicPartitioningKeys.size() != table.getTTable().getPartitionKeysSize()) {
// If this isn't equal, then bogus key values have been inserted, error out.
throw new HCatException(ErrorType.ERROR_INVALID_PARTITION_VALUES, "Invalid partition keys specified");
}
outputJobInfo.setDynamicPartitioningKeys(dynamicPartitioningKeys);
String dynHash;
if ((dynHash = conf.get(HCatConstants.HCAT_DYNAMIC_PTN_JOBID)) == null) {
dynHash = String.valueOf(Math.random());
}
conf.set(HCatConstants.HCAT_DYNAMIC_PTN_JOBID, dynHash);
// if custom pattern is set in case of dynamic partitioning, configure custom path
String customPattern = conf.get(HCatConstants.HCAT_DYNAMIC_CUSTOM_PATTERN);
if (customPattern != null) {
HCatFileUtil.setCustomPath(customPattern, outputJobInfo);
}
}
outputJobInfo.setPartitionValues(valueMap);
}
// To get around hbase failure on single node, see BUG-4383
conf.set("dfs.client.read.shortcircuit", "false");
HCatSchema tableSchema = HCatUtil.extractSchema(table);
StorerInfo storerInfo = InternalUtil.extractStorerInfo(table.getTTable().getSd(), table.getParameters());
List<String> partitionCols = new ArrayList<String>();
for (FieldSchema schema : table.getPartitionKeys()) {
partitionCols.add(schema.getName());
}
HiveStorageHandler storageHandler = HCatUtil.getStorageHandler(conf, storerInfo);
//Serialize the output info into the configuration
outputJobInfo.setTableInfo(HCatTableInfo.valueOf(table.getTTable()));
outputJobInfo.setOutputSchema(tableSchema);
harRequested = getHarRequested(hiveConf);
outputJobInfo.setHarRequested(harRequested);
maxDynamicPartitions = getMaxDynamicPartitions(hiveConf);
outputJobInfo.setMaximumDynamicPartitions(maxDynamicPartitions);
HCatUtil.configureOutputStorageHandler(storageHandler, conf, outputJobInfo);
Path tblPath = new Path(table.getTTable().getSd().getLocation());
/* Set the umask in conf such that files/dirs get created with table-dir
* permissions. Following three assumptions are made:
* 1. Actual files/dirs creation is done by RecordWriter of underlying
* output format. It is assumed that they use default permissions while creation.
* 2. Default Permissions = FsPermission.getDefault() = 777.
* 3. UMask is honored by underlying filesystem.
*/
FsPermission.setUMask(conf, FsPermission.getDefault().applyUMask(tblPath.getFileSystem(conf).getFileStatus(tblPath).getPermission()));
if (Security.getInstance().isSecurityEnabled()) {
Security.getInstance().handleSecurity(credentials, outputJobInfo, client, conf, harRequested);
}
} catch (Exception e) {
if (e instanceof HCatException) {
throw (HCatException) e;
} else {
throw new HCatException(ErrorType.ERROR_SET_OUTPUT, e);
}
} finally {
HCatUtil.closeHiveClientQuietly(client);
}
}
use of org.apache.hive.hcatalog.data.schema.HCatSchema in project hive by apache.
the class GroupByAge method run.
public int run(String[] args) throws Exception {
Configuration conf = getConf();
args = new GenericOptionsParser(conf, args).getRemainingArgs();
String serverUri = args[0];
String inputTableName = args[1];
String outputTableName = args[2];
String dbName = null;
String principalID = System.getProperty(HCatConstants.HCAT_METASTORE_PRINCIPAL);
if (principalID != null)
conf.set(HCatConstants.HCAT_METASTORE_PRINCIPAL, principalID);
Job job = new Job(conf, "GroupByAge");
HCatInputFormat.setInput(job, dbName, inputTableName);
// initialize HCatOutputFormat
job.setInputFormatClass(HCatInputFormat.class);
job.setJarByClass(GroupByAge.class);
job.setMapperClass(Map.class);
job.setReducerClass(Reduce.class);
job.setMapOutputKeyClass(IntWritable.class);
job.setMapOutputValueClass(IntWritable.class);
job.setOutputKeyClass(WritableComparable.class);
job.setOutputValueClass(DefaultHCatRecord.class);
HCatOutputFormat.setOutput(job, OutputJobInfo.create(dbName, outputTableName, null));
HCatSchema s = HCatOutputFormat.getTableSchema(job);
System.err.println("INFO: output schema explicitly set for writing:" + s);
HCatOutputFormat.setSchema(job, s);
job.setOutputFormatClass(HCatOutputFormat.class);
return (job.waitForCompletion(true) ? 0 : 1);
}
use of org.apache.hive.hcatalog.data.schema.HCatSchema in project hive by apache.
the class StoreNumbers method main.
public static void main(String[] args) throws Exception {
Configuration conf = new Configuration();
args = new GenericOptionsParser(conf, args).getRemainingArgs();
String[] otherArgs = new String[2];
int j = 0;
for (int i = 0; i < args.length; i++) {
if (args[i].equals("-libjars")) {
// generic options parser doesn't seem to work!
conf.set("tmpjars", args[i + 1]);
// skip it , the for loop will skip its value
i = i + 1;
} else {
otherArgs[j++] = args[i];
}
}
if (otherArgs.length != 2) {
usage();
}
String serverUri = otherArgs[0];
if (otherArgs[1] == null || (!otherArgs[1].equalsIgnoreCase("part") && !otherArgs[1].equalsIgnoreCase("nopart")) && !otherArgs[1].equalsIgnoreCase("nopart_pig")) {
usage();
}
boolean writeToPartitionedTable = (otherArgs[1].equalsIgnoreCase("part"));
boolean writeToNonPartPigTable = (otherArgs[1].equalsIgnoreCase("nopart_pig"));
String tableName = NUMBERS_TABLE_NAME;
String dbName = "default";
Map<String, String> outputPartitionKvps = new HashMap<String, String>();
String outputTableName = null;
conf.set(IS_PIG_NON_PART_TABLE, "false");
if (writeToPartitionedTable) {
outputTableName = NUMBERS_PARTITIONED_TABLE_NAME;
outputPartitionKvps.put("datestamp", "20100101");
} else {
if (writeToNonPartPigTable) {
conf.set(IS_PIG_NON_PART_TABLE, "true");
outputTableName = NUMBERS_NON_PARTITIONED_PIG_TABLE_NAME;
} else {
outputTableName = NUMBERS_NON_PARTITIONED_TABLE_NAME;
}
// test with null or empty randomly
if (new Random().nextInt(2) == 0) {
outputPartitionKvps = null;
}
}
String principalID = System.getProperty(HCatConstants.HCAT_METASTORE_PRINCIPAL);
if (principalID != null)
conf.set(HCatConstants.HCAT_METASTORE_PRINCIPAL, principalID);
Job job = new Job(conf, "storenumbers");
// initialize HCatInputFormat
HCatInputFormat.setInput(job, dbName, tableName);
// initialize HCatOutputFormat
HCatOutputFormat.setOutput(job, OutputJobInfo.create(dbName, outputTableName, outputPartitionKvps));
// test with and without specifying schema randomly
HCatSchema s = HCatInputFormat.getTableSchema(job);
if (writeToNonPartPigTable) {
List<HCatFieldSchema> newHfsList = new ArrayList<HCatFieldSchema>();
// change smallint and tinyint to int
for (HCatFieldSchema hfs : s.getFields()) {
if (hfs.getTypeString().equals("smallint")) {
newHfsList.add(new HCatFieldSchema(hfs.getName(), HCatFieldSchema.Type.INT, hfs.getComment()));
} else if (hfs.getTypeString().equals("tinyint")) {
newHfsList.add(new HCatFieldSchema(hfs.getName(), HCatFieldSchema.Type.INT, hfs.getComment()));
} else {
newHfsList.add(hfs);
}
}
s = new HCatSchema(newHfsList);
}
HCatOutputFormat.setSchema(job, s);
job.setInputFormatClass(HCatInputFormat.class);
job.setOutputFormatClass(HCatOutputFormat.class);
job.setJarByClass(StoreNumbers.class);
job.setMapperClass(SumMapper.class);
job.setOutputKeyClass(IntWritable.class);
job.setNumReduceTasks(0);
job.setOutputValueClass(DefaultHCatRecord.class);
System.exit(job.waitForCompletion(true) ? 0 : 1);
}
use of org.apache.hive.hcatalog.data.schema.HCatSchema in project hive by apache.
the class WriteRC method run.
public int run(String[] args) throws Exception {
Configuration conf = getConf();
args = new GenericOptionsParser(conf, args).getRemainingArgs();
String serverUri = args[0];
String inputTableName = args[1];
String outputTableName = args[2];
String dbName = null;
String principalID = System.getProperty(HCatConstants.HCAT_METASTORE_PRINCIPAL);
if (principalID != null)
conf.set(HCatConstants.HCAT_METASTORE_PRINCIPAL, principalID);
Job job = new Job(conf, "WriteRC");
HCatInputFormat.setInput(job, dbName, inputTableName);
// initialize HCatOutputFormat
job.setInputFormatClass(HCatInputFormat.class);
job.setJarByClass(WriteRC.class);
job.setMapperClass(Map.class);
job.setOutputKeyClass(WritableComparable.class);
job.setOutputValueClass(DefaultHCatRecord.class);
job.setNumReduceTasks(0);
HCatOutputFormat.setOutput(job, OutputJobInfo.create(dbName, outputTableName, null));
HCatSchema s = HCatInputFormat.getTableSchema(job);
System.err.println("INFO: output schema explicitly set for writing:" + s);
HCatOutputFormat.setSchema(job, s);
job.setOutputFormatClass(HCatOutputFormat.class);
return (job.waitForCompletion(true) ? 0 : 1);
}
Aggregations