use of org.apache.hive.hcatalog.mapreduce.OutputJobInfo in project hive by apache.
the class HCatOutputFormatWriter method prepareWrite.
@Override
public WriterContext prepareWrite() throws HCatException {
OutputJobInfo jobInfo = OutputJobInfo.create(we.getDbName(), we.getTableName(), we.getPartitionKVs());
Job job;
try {
job = new Job(conf);
HCatOutputFormat.setOutput(job, jobInfo);
HCatOutputFormat.setSchema(job, HCatOutputFormat.getTableSchema(job.getConfiguration()));
HCatOutputFormat outFormat = new HCatOutputFormat();
outFormat.checkOutputSpecs(job);
outFormat.getOutputCommitter(ShimLoader.getHadoopShims().getHCatShim().createTaskAttemptContext(job.getConfiguration(), ShimLoader.getHadoopShims().getHCatShim().createTaskAttemptID())).setupJob(job);
} catch (IOException e) {
throw new HCatException(ErrorType.ERROR_NOT_INITIALIZED, e);
} catch (InterruptedException e) {
throw new HCatException(ErrorType.ERROR_NOT_INITIALIZED, e);
}
WriterContextImpl cntxt = new WriterContextImpl();
cntxt.setConf(job.getConfiguration());
return cntxt;
}
use of org.apache.hive.hcatalog.mapreduce.OutputJobInfo in project hive by apache.
the class HCatStorer method setStoreLocation.
/**
* @param location databaseName.tableName
*/
@Override
public void setStoreLocation(String location, Job job) throws IOException {
Configuration config = job.getConfiguration();
config.set(INNER_SIGNATURE, INNER_SIGNATURE_PREFIX + "_" + sign);
Properties udfProps = UDFContext.getUDFContext().getUDFProperties(this.getClass(), new String[] { sign });
String[] userStr = location.split("\\.");
if (udfProps.containsKey(HCatConstants.HCAT_PIG_STORER_LOCATION_SET)) {
for (Enumeration<Object> emr = udfProps.keys(); emr.hasMoreElements(); ) {
PigHCatUtil.getConfigFromUDFProperties(udfProps, config, emr.nextElement().toString());
}
Credentials crd = jobCredentials.get(INNER_SIGNATURE_PREFIX + "_" + sign);
if (crd != null) {
job.getCredentials().addAll(crd);
}
} else {
Job clone = new Job(job.getConfiguration());
OutputJobInfo outputJobInfo;
if (userStr.length == 2) {
outputJobInfo = OutputJobInfo.create(userStr[0], userStr[1], partitions);
} else if (userStr.length == 1) {
outputJobInfo = OutputJobInfo.create(null, userStr[0], partitions);
} else {
throw new FrontendException("location " + location + " is invalid. It must be of the form [db.]table", PigHCatUtil.PIG_EXCEPTION_CODE);
}
Schema schema = (Schema) ObjectSerializer.deserialize(udfProps.getProperty(PIG_SCHEMA));
if (schema != null) {
pigSchema = schema;
}
if (pigSchema == null) {
throw new FrontendException("Schema for data cannot be determined.", PigHCatUtil.PIG_EXCEPTION_CODE);
}
String externalLocation = (String) udfProps.getProperty(HCatConstants.HCAT_PIG_STORER_EXTERNAL_LOCATION);
if (externalLocation != null) {
outputJobInfo.setLocation(externalLocation);
}
try {
HCatOutputFormat.setOutput(job, outputJobInfo);
} catch (HCatException he) {
// information passed to HCatOutputFormat was not right
throw new PigException(he.getMessage(), PigHCatUtil.PIG_EXCEPTION_CODE, he);
}
HCatSchema hcatTblSchema = HCatOutputFormat.getTableSchema(job.getConfiguration());
try {
doSchemaValidations(pigSchema, hcatTblSchema);
} catch (HCatException he) {
throw new FrontendException(he.getMessage(), PigHCatUtil.PIG_EXCEPTION_CODE, he);
}
computedSchema = convertPigSchemaToHCatSchema(pigSchema, hcatTblSchema);
HCatOutputFormat.setSchema(job, computedSchema);
udfProps.setProperty(COMPUTED_OUTPUT_SCHEMA, ObjectSerializer.serialize(computedSchema));
// methods need not be called many times.
for (Entry<String, String> keyValue : job.getConfiguration()) {
String oldValue = clone.getConfiguration().getRaw(keyValue.getKey());
if ((oldValue == null) || (keyValue.getValue().equals(oldValue) == false)) {
udfProps.put(keyValue.getKey(), keyValue.getValue());
}
}
//Store credentials in a private hash map and not the udf context to
// make sure they are not public.
jobCredentials.put(INNER_SIGNATURE_PREFIX + "_" + sign, job.getCredentials());
udfProps.put(HCatConstants.HCAT_PIG_STORER_LOCATION_SET, true);
}
}
Aggregations