use of org.apache.hive.hcatalog.mapreduce.InputJobInfo in project hive by apache.
the class HCatLoader method setLocation.
@Override
public void setLocation(String location, Job job) throws IOException {
HCatContext.INSTANCE.setConf(job.getConfiguration()).getConf().get().setBoolean(HCatConstants.HCAT_DATA_TINY_SMALL_INT_PROMOTION, true);
UDFContext udfContext = UDFContext.getUDFContext();
Properties udfProps = udfContext.getUDFProperties(this.getClass(), new String[] { signature });
job.getConfiguration().set(INNER_SIGNATURE, INNER_SIGNATURE_PREFIX + "_" + signature);
Pair<String, String> dbTablePair = PigHCatUtil.getDBTableNames(location);
dbName = dbTablePair.first;
tableName = dbTablePair.second;
RequiredFieldList requiredFieldsInfo = (RequiredFieldList) udfProps.get(PRUNE_PROJECTION_INFO);
// the Configuration
if (udfProps.containsKey(HCatConstants.HCAT_PIG_LOADER_LOCATION_SET)) {
for (Enumeration<Object> emr = udfProps.keys(); emr.hasMoreElements(); ) {
PigHCatUtil.getConfigFromUDFProperties(udfProps, job.getConfiguration(), emr.nextElement().toString());
}
if (!HCatUtil.checkJobContextIfRunningFromBackend(job)) {
//Combine credentials and credentials from job takes precedence for freshness
Credentials crd = jobCredentials.get(INNER_SIGNATURE_PREFIX + "_" + signature);
job.getCredentials().addAll(crd);
}
} else {
Job clone = new Job(job.getConfiguration());
HCatInputFormat.setInput(job, dbName, tableName, getPartitionFilterString());
InputJobInfo inputJobInfo = (InputJobInfo) HCatUtil.deserialize(job.getConfiguration().get(HCatConstants.HCAT_KEY_JOB_INFO));
SpecialCases.addSpecialCasesParametersForHCatLoader(job.getConfiguration(), inputJobInfo.getTableInfo());
//be called many times.
for (Entry<String, String> keyValue : job.getConfiguration()) {
String oldValue = clone.getConfiguration().getRaw(keyValue.getKey());
if ((oldValue == null) || (keyValue.getValue().equals(oldValue) == false)) {
udfProps.put(keyValue.getKey(), keyValue.getValue());
}
}
udfProps.put(HCatConstants.HCAT_PIG_LOADER_LOCATION_SET, true);
//Store credentials in a private hash map and not the udf context to
// make sure they are not public.
Credentials crd = new Credentials();
crd.addAll(job.getCredentials());
jobCredentials.put(INNER_SIGNATURE_PREFIX + "_" + signature, crd);
}
if (requiredFieldsInfo != null) {
// convert to hcatschema and pass to HCatInputFormat
try {
//push down projections to columnar store works for RCFile and ORCFile
ArrayList<Integer> list = new ArrayList<Integer>(requiredFieldsInfo.getFields().size());
for (RequiredField rf : requiredFieldsInfo.getFields()) {
list.add(rf.getIndex());
}
ColumnProjectionUtils.setReadColumns(job.getConfiguration(), list);
outputSchema = phutil.getHCatSchema(requiredFieldsInfo.getFields(), signature, this.getClass());
HCatInputFormat.setOutputSchema(job, outputSchema);
} catch (Exception e) {
throw new IOException(e);
}
} else {
// else - this means pig's optimizer never invoked the pushProjection
// method - so we need all fields and hence we should not call the
// setOutputSchema on HCatInputFormat
ColumnProjectionUtils.setReadAllColumns(job.getConfiguration());
if (HCatUtil.checkJobContextIfRunningFromBackend(job)) {
try {
HCatSchema hcatTableSchema = (HCatSchema) udfProps.get(HCatConstants.HCAT_TABLE_SCHEMA);
outputSchema = hcatTableSchema;
HCatInputFormat.setOutputSchema(job, outputSchema);
} catch (Exception e) {
throw new IOException(e);
}
}
}
if (LOG.isDebugEnabled()) {
LOG.debug("outputSchema=" + outputSchema);
}
}
use of org.apache.hive.hcatalog.mapreduce.InputJobInfo in project hive by apache.
the class HCatLoader method getStatistics.
/**
* Get statistics about the data to be loaded. Only input data size is implemented at this time.
*/
@Override
public ResourceStatistics getStatistics(String location, Job job) throws IOException {
try {
ResourceStatistics stats = new ResourceStatistics();
InputJobInfo inputJobInfo = (InputJobInfo) HCatUtil.deserialize(job.getConfiguration().get(HCatConstants.HCAT_KEY_JOB_INFO));
stats.setmBytes(getSizeInBytes(inputJobInfo) / 1024 / 1024);
return stats;
} catch (Exception e) {
throw new IOException(e);
}
}
Aggregations